**Housekeeping
clear all
cap log close
log using ${rep_root}/logs/supervisor_observables.log, text replace
set more off

import excel using ${rep_root}/data/FOIA/IPRA_personnel.xlsx, first

split Name, p(", ")

gen NAME = Name2 + " " + Name1
rename HireDateIPRA DATEOFHIRE
keep NAME DATEOFHIRE HireDateCity DateLeft
replace NAME = upper(NAME)
drop if NAME == " "
tempfile ipra
save `ipra'

clear
import excel using ${rep_root}/data/FOIA/COPA_personnel.xlsx, first

gen NAME = FIRSTNAME + " " + LASTNAME

keep GENDER RACE YEAROFBIRTH DATEOFHIRE NAME
rename DATEOFHIRE DATEOFHIRE_COPA
gen copa_personnel = 1
drop if NAME == ""
merge 1:1 NAME using `ipra', nogen
replace DATEOFHIRE = DATEOFHIRE_COPA if DATEOFHIRE ==.
replace copa_personnel = 0 if copa_personnel==.
sort NAME
gen idusing = _n 

tempfile personnel
save `personnel'

clear
insheet using "${rep_root}/data/FOIA/CLEAR CR Status Data.csv" 
tempfile cr_temp
save `cr_temp'

clear
insheet using "${rep_root}/data/FOIA/CLEAR INFO Status Data.csv" 
append using `cr_temp'

by created_by, sort: keep if _n == 1
drop if position == "POLICE OFFICER"
drop if position == "SERGEANT OF POLICE"
drop if position == "LIEUTENANT OF POLICE"
drop if position == "PO AS DETECTIVE"
drop if position == "COMMANDER"
drop if position == "POLICE AGENT"
rename created_by NAME
sort NAME
gen idmaster = _n

reclink NAME using `personnel', idmaster(idmaster) idusing(idusing) gen(score)  
sort score
duplicates report idmaster

keep score NAME UNAME position race gender RACE GENDER YEAROFBIRTH DATEOFHIRE copa_personnel HireDateCity DateLeft

outsheet using ${rep_root}/data/personnel_info.csv, replace name comma


 
 
  
***************************************************************************** 
**B. Bring in investigator/supervisor info in the status data 
 



***********************
**Bringing in case status data and make categories by status  
***********************



clear
insheet using "${rep_root}/data/FOIA/CLEAR CR Status Data.csv" 
tempfile cr_temp
save `cr_temp'

clear
insheet using "${rep_root}/data/FOIA/CLEAR INFO Status Data.csv" 
append using `cr_temp'

tab status

  

//order chronologically
gen ampm = substr(status_time, -2,2)
tab ampm
gen hr = substr(status_time, 1, strpos(status_time,":")-1)
tab hr
gen min = substr(status_time, strpos(status_time,":")+1, 2)
destring hr min, replace
replace hr = hr + 12 if hr<12 & ampm=="PM" 
replace hr = 0 if hr==12 & ampm=="AM"



gen status_time_str = status_date+ " " + status_time
gen status_dt = date(status_date, "MDY", 2025)
gen status_yr = year(status_dt) 
gen status_month = month(status_dt) 
gen dow = dow(status_dt)
sort cr_number status_dt hr min    

**Get intake time for each case
preserve
sort cr_number status_dt hr min
by cr_number: keep if _n==1

keep cr_number status_dt hr min dow
rename status_dt intake_dt
rename hr hr_intake
rename min min_intake
rename dow dow_intake

tempfile intake
save `intake'
restore
 


sort cr_number
merge m:1 cr_number using `intake', nogen

 

**Create wide list of staff marked as pending assign team
preserve
keep if status=="PENDING ASSIGN TEAM"

sort cr_number
by cr_number: gen idx = _n
rename created_by intake_sup_

sum idx
local idx_max = r(max)

keep cr_number idx intake_sup_
reshape wide intake_sup_, i(cr_number) j(idx)

tempfile intake_wide
save `intake_wide'
restore 
 

***********************
**Bring in data on PERSONNEL WHO ARE NOT REALLY SUPERVISORS USING A VERIFIED LIST
***********************
 

//bring in list of investigating supervisors, and filter out those that are not. 
rename created_by status_sup  
replace status_sup = "SHANNON B HAYES" if status_sup == "SHANNON HAYES" //correct data error in name
 

preserve
clear
insheet using ${rep_root}/data/supervisors_identified.csv
tempfile sup_identified
save `sup_identified'
restore

merge m:1 status_sup using `sup_identified'
replace sup_identified = 0 if sup_identified !=1 
drop _merge


keep if inlist(agency, "IPRA", "OPS", "COPA")

 
//bring manually corrected personnel data
preserve
clear
local ver_suff _9_21
insheet using ${rep_root}/data/personnel_info_database.csv
rename name status_sup
tempfile personnel_data
save `personnel_data'
restore

replace status_sup = "SHANNON HAYES" if status_sup == "SHANNON B HAYES"  
 
merge m:1 status_sup using `personnel_data', keepusing(hiredatecity dateofhire dateleft)
 

drop _merge


**identify whether the first supervisor is an identified supervisor
preserve
keep if status == "PENDING INVESTIGATION"
sort cr_number status_dt hr min  
by cr_number: keep if _n == 1
keep cr_number sup_identified status_sup race gender
rename sup_identified first_sup 
rename status_sup sup_name
rename race sup_race
rename gender sup_gender
tempfile first_sup
save `first_sup'
restore
merge m:1 cr_number using `first_sup', nogen
replace first_sup = 0 if first_sup!=1




**Restrict to years where cases were being randomly assigned
gen intake_yr = year(intake_dt) 
gen intake_mo = month(intake_dt)

 
//keep if intake_yr>=2006 
//keep if inrange(intake_yr, 2006, 2007) | inrange(intake_yr, 2015, 2018) | (intake_yr==2008 & intake_mo<=7) | (intake_yr==2014 & intake_mo>=9)   
 
 
 

gen intake_yearmo = ym(intake_yr, intake_mo)
format %tm intake_yearmo 

gen lead_team = status == "PENDING INVESTIGATION"

by cr_number, sort: egen assigned_team = max(lead_team)

by cr_number, sort: egen identified_sup = max(first_sup)


preserve
collapse (max) assigned_team (max) first_sup, by(cr_number intake_dt)

tab assigned_team //there are 9973 complaints that are brought in the status; 2,052 are not sent to investigation team
restore
 

preserve
//take the person who is in the role of "waiting for their investigator to investigate"  

keep if status == "PENDING INVESTIGATION"


 
//take the first supervising investigator
sort cr_number status_dt hr min  
by cr_number: keep if _n == 1


//drop if not an identified supervisor
drop if sup_identified !=1



keep cr_number status_sup intake_dt //sup_identified

tab status_sup

sort cr_number
merge 1:1 cr_number using `intake_wide', keep(1 3) nogen

gen byintake = 0 

forvalues i = 1/`idx_max'{
	//drop if status_sup==intake_sup_`i'
	replace byintake = 1 if status_sup==intake_sup_`i'
}





sort status_sup 
egen inv_status_id = group(status_sup) 

gen sup_sample = 1

tempfile inv_in
save `inv_in'
restore

merge m:1 cr_number using `inv_in'

keep if _merge == 3 //| _merge == 1
drop _merge
 
 
 
 
 
*************************************************************************************************** 
**C. Generate superivsor-year level stats 


//restrict to cases handled by the supervisors  
keep if sup_sample == 1  
gen status_yearmo = ym(status_yr, status_month)
format %tm status_yearmo
 
 
 
//total number of cases in a year 
preserve 
sort cr_number status_dt hr min 
keep if status == "PENDING INVESTIGATION" 
by cr_number, sort: keep if _n== 1
gen investigation = 1 
drop if inv_status_id ==.
collapse (sum) investigation, by(inv_status_id status_yr)  
tempfile cases_total
save `cases_total'
restore 
 
merge m:1 inv_status_id status_yr using `cases_total', nogen keep(1 3)


//monthly number of investigators in a team for each supervisor-year

preserve 

sort cr_number status_dt hr min 
keep if status == "PENDING INVESTIGATIVE REVIEW" 
by cr_number, sort: keep if _n== 1


egen tag = tag(inv_status_id status_yearmo status_sup) 
egen distinct = total(tag), by(inv_status_id status_yearmo)

by inv_status_id intake_yearmo, sort: keep if _n == 1
 
collapse (mean) distinct, by(inv_status_id status_yr)  
tempfile monthly_avg_investigator
save `monthly_avg_investigator'
restore 

merge m:1 inv_status_id status_yr using `monthly_avg_investigator', nogen keep(1 3)


//number of investigators who newly joined in a team for each supervisor-year



preserve 

sort cr_number status_dt hr min 
keep if status == "PENDING INVESTIGATIVE REVIEW" 
by cr_number, sort: keep if _n== 1

 

//calculate tenure of the team: prioritize IPRA hire dates when available, unless they are pinned to 9/5/2007, in which case we use the City hire date in the same dataset
 
 
gen hire_dt = date(dateofhire, "MDY", 2020)
gen hire_dt_city = date(hiredatecity, "MDY", 2020)
gen hire_year = year(hire_dt)
gen hire_year_city = year(hire_dt_city)
replace hire_year = hire_year_city if dateofhire == "9/5/07"

 
 
gen tenure_yr = status_yr-hire_year+1

sum tenure_yr, d

egen tag = tag(inv_status_id status_yearmo status_sup) 
keep if tag == 1
 
collapse (mean) tenure_yr, by(inv_status_id status_yearmo status_yr) 

 
 
collapse (mean) tenure_yr, by(inv_status_id status_yr)  
tempfile monthly_tenure
save `monthly_tenure'
restore 


merge m:1 inv_status_id status_yr using `monthly_tenure', nogen keep(1 3)


******************************

**summarize supervisor-year level stats

 
keep if sup_name == status_sup
keep inv_status_id investigation status_yr distinct tenure_yr dateofhire sup_race sup_gender sup_name hiredatecity

//calculate tenure of the supervisor: prioritize IPRA hire dates when available, unless they are pinned to 9/5/2007, in which case we use the City hire date in the same dataset
gen hire_dt = date(dateofhire, "MDY", 2020)
gen hire_dt_city = date(hiredatecity, "MDY", 2020)
gen hire_year = year(hire_dt)
gen hire_year_city = year(hire_dt_city)
replace hire_year = hire_year_city if dateofhire == "9/5/07"
 
gen sup_tenure_yr = status_yr-hire_year+1

by inv_status_id status_yr, sort:keep if _n == 1
gen sup_nonwhite = sup_race != "WHI"
gen sup_female = sup_gender == "F"

 
rename distinct avg_investigators 
rename tenure_yr avg_tenure //tenure of the subordinates
rename status_yr intake_yr
rename inv_status_id inv_id
gen caseload = investigation/avg_investigators 

tempfile sup_stats
save `sup_stats'






*************************************************************************************************** 
**D. Merge to affidavit LOM and Relate the characteristics with the affidavit LOMs
******************************* 

 



 
clear  
local cutoff = 50
local seed = 9999 
local case_lb = `cutoff'

local covarsmin d_mo_* d_dist_*
local covars vic_female_any vic_white_any acc_white acc_male acc_old cat_3 cat_4 d_mo_* d_dist_* arrests_pre complaints_pre force_pre any_prior_complaint tenure  
 
 
*****************
**BRING IN CASE STATUS + COMPLAINTS DATA AND APPLY DATA RESTRICTION
***************** 
 
do ${rep_root}/code/data_restrict.do

*****************
**MERGE WITH OUTCOMES DATA
***************** 

merge 1:1 cr_id acc_id using ${rep_root}/data/outcomes, keep(3) nogen 

**drop if accused are not police officers
//drop if acc_rank != "POLICE OFFICER"

*****************
**DEFINE TREATMENT AND MAKE COVARIATES
***************** 


**Drop investigators without enough cases
tab inv_id
sort inv_id 
by inv_id: gen inv_ct = _N
drop if inv_ct<`case_lb'
tab inv_id intake_yr

**Define some covariates
gen po = acc_rank == "POLICE OFFICER"
gen minor_inj = vic_inj_1<=1
gen vic_old = (year(inc_complaint_dt_1) - vic_byr_oldest)>=35 & !missing(vic_byr_oldest)
gen acc_old = acc_age>=35 & !missing(acc_age)
gen cat_3 = acc_cat==3
gen cat_4 = acc_cat==4
 
   
  

forvalues i = 1/25{
	gen d_dist_`i' = inc_district_1==`i'
}
forvalues yr = 2006/2008{
	gen d_yr_`yr' = intake_yr==`yr'

	forvalues m = 1/12{
		gen d_mo_yr_`m'_`yr' = intake_yr==`yr' & intake_mo==`m'
	}
	
}
forvalues yr = 2014/2018{
	gen d_yr_`yr' = intake_yr==`yr'
	
	forvalues m = 1/12{
		gen d_mo_yr_`m'_`yr' = intake_yr==`yr' & intake_mo==`m'
	}
}

gen d_noaff = finding_cd=="NO AFFIDAVIT"
gen d_unfound = finding_cd=="UNFOUNDED"
gen d_nosus = finding_cd=="NOT SUSTAINED"
gen d_exon = finding_cd=="EXONERATED"
gen d_sus = finding_cd=="SUSTAINED"
gen d_addlinv = finding_cd=="ADDITIONAL INVESTIGATION REQUESTED"

qui foreach var in vic_female_any vic_white_any acc_white acc_male acc_old cat_3 cat_4 complaints_pre complaints_pre force_pre any_prior_complaint tenure complaints_postc1_3 serious_postc1_3 complaints_postc1_3 merit_postc1_3 d_noaff d_unfound d_nosus d_exon d_sus d_addlinv{
	sum `var'
	*local temp_m = r(mean)
	local ss_`var'_m : di %6.3f r(mean)
	*local temp_sd = r(sd)
	local ss_`var'_sd : di %6.3f r(sd)
}

**Define treatment
 

gen treat_affidavit = finding_cd != "NO AFFIDAVIT"    
gen treat_sustain = finding_cd == "SUSTAINED"   

 


foreach t in treat_affidavit  treat_sustain{

**Create LOM
capture drop resid cr_resid cr_ct inv_tot_resid case inv_ct2  
qui reg `t' `covars'
predict resid, residuals

sort cr_id
by cr_id: egen cr_resid = total(resid)
by cr_id: gen cr_ct = _N

sort inv_id intake_yr intake_mo
gen case = 1
by inv_id intake_yr: egen inv_tot_resid = total(resid)
by inv_id intake_yr: egen inv_ct2 = total(case)
gen lom_`t' = (inv_tot_resid - cr_resid)/(inv_ct2-cr_ct)

 


sort inv_id
by inv_id: sum lom_`t'
}

merge m:1 inv_id intake_yr using `sup_stats'


keep if _merge == 3 & !missing(lom_treat_affidavit)
drop _merge

 
collapse (mean) lom_treat_affidavit caseload avg_tenure sup_tenure_yr sup_nonwhite sup_female, by(inv_id intake_yr)

 
		
foreach var of varlist caseload avg_tenure sup_tenure_yr sup_nonwhite sup_female{
	
	reg lom_treat_affidavit `var' i.intake_yr , cluster(inv_id)
		local beta`var' = _b[`var']
		local beta`var' : di %6.3f `beta`var''
		
		local r2 = e(r2)
		local r2_`var' : di %6.3f `r2'
		
		matrix v`var' = e(V)
		
		
		local se`var' = sqrt(v`var'[1,1])
		local se`var' : di %6.3f `se`var''
		
		
		scalar N= e(N)
		local N`var' = N
		local N`var' : di %7.0fc `N`var''
		
		//boottest `var', nograph seed(`seed')
		matrix b = r(table)
		local p`var' = b[4,1]
 
		//local p`var' = r(p)
		display r(p)


		//get stars based on bootstrapped p values 
		glo star_`var' = cond(`p`var'' <0.01, "***", cond(`p`var'' <0.05, "**", cond(`p`var'' <0.1, "*", "")))
		local p`var' : di %6.3f `p`var'' 
	
}
 
 
 reg lom_treat_affidavit caseload avg_tenure sup_tenure_yr sup_nonwhite sup_female i.intake_yr , cluster(inv_id)

texdoc init ${rep_root}/output/tables/cor_affidavit.tex, replace force
tex \begin{tabular}{l*{3}{c}}
tex \hline\hline
tex \multicolumn{1}{c}{Covariate}&\multicolumn{1}{c}{Affidavit LOM}&\multicolumn{1}{c}{Obs.} &\multicolumn{1}{c}{R-Squared} \\
tex \hline
tex Total Cases Relative to Team Size & `betacaseload'${star_caseload}& `Ncaseload' &`r2_caseload'  \\
tex 	& [`pcaseload'] \\
tex Average Investigator Tenure & `betaavg_tenure'${star_avg_tenure}& `Navg_tenure' &`r2_avg_tenure'\\
tex 	& [`pavg_tenure'] \\
tex Supervisor Tenure & `betasup_tenure_yr'${star_sup_tenure_yr}& `Nsup_tenure_yr'&`r2_sup_tenure_yr' \\
tex 	& [`psup_tenure_yr'] \\
tex Supervisor Non-White & `betasup_nonwhite'${star_sup_nonwhite}& `Nsup_nonwhite' &`r2_sup_nonwhite'\\
tex 	& [`psup_nonwhite'] \\
tex Supervisor Female & `betasup_female'${star_sup_female}& `Nsup_female'&`r2_sup_female' \\
tex 	& [`psup_female'] \\
tex \hline\hline
tex \end{tabular}
texdoc close 
  
  
 
  